========= loading data =========
m1 = read.csv('~/Desktop/NCSA/CGManalyzer-datasets/ID01.csv')
m2 = read.csv('~/Desktop/NCSA/CGManalyzer-datasets/ID02.csv')
m3 = read.csv('~/Desktop/NCSA/CGManalyzer-datasets/ID03.csv')
m4 = read.csv('~/Desktop/NCSA/CGManalyzer-datasets/ID11.csv')
m5 = read.csv('~/Desktop/NCSA/CGManalyzer-datasets/ID12.csv')
m6 = read.csv('~/Desktop/NCSA/CGManalyzer-datasets/ID13.csv')
m7 = read.csv('~/Desktop/NCSA/CGManalyzer-datasets/ID21.csv')
m8 = read.csv('~/Desktop/NCSA/CGManalyzer-datasets/ID22.csv')
m9 = read.csv('~/Desktop/NCSA/CGManalyzer-datasets/ID23.csv')
m10 = read.csv('~/Desktop/NCSA/CGManalyzer-datasets/ID29.csv')
m11 = read.csv('~/Desktop/NCSA/CGManalyzer-datasets/ID30.csv')
m12 = read.csv('~/Desktop/NCSA/CGManalyzer-datasets/ID31.csv')
training_frame = rbind.data.frame(
m1$glucoseValue,
m2$glucoseValue,
m3$glucoseValue,
m4$glucoseValue,
m5$glucoseValue,
m6$glucoseValue,
m7$glucoseValue,
m8$glucoseValue,
m9$glucoseValue,
m10$glucoseValue,
m11$glucoseValue,
m12$glucoseValue
)
training_frame
========= loading packages ===========
require(dtwclust)
Loading required package: dtwclust
Loading required package: proxy
Attaching package: ‘proxy’
The following objects are masked from ‘package:stats’:
as.dist, dist
The following object is masked from ‘package:base’:
as.matrix
Loading required package: dtw
Loaded dtw v1.21-3. See ?dtw for help, citation("dtw") for use in publication.
Registered S3 method overwritten by 'dplyr':
method from
print.rowwise_df
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
dtwclust:
Setting random number generator to L'Ecuyer-CMRG (see RNGkind()).
To read the included vignettes type: browseVignettes("dtwclust").
See news(package = "dtwclust") after package updates.
require(mcclust)
Loading required package: mcclust
Loading required package: lpSolve
require(ClusterR)
Loading required package: ClusterR
Loading required package: gtools
======== HIERARCHICAL => Raw ===========
clust.hier_raw <- tsclust(training_frame, type = "h", k = 4L, distance = "dtw2", trace=TRUE, control = hierarchical_control(method = "ward.D"))
Calculating distance matrix...
Performing hierarchical clustering...
Extracting centroids...
Elapsed time is 114.916 seconds.
plot(clust.hier_raw, type="sc")
plot(clust.hier_raw)
t(cbind(training_frame[,0], cluster = clust.hier_raw@cluster))
1 2 3 4 5 6 7 8 9 10 11 12
cluster 1 1 2 3 3 3 1 4 3 4 4 4
l_hier <- clust.hier_raw@cluster
m_hier <- c(1,1,1,3,3,3,2,2,2,4,4,4)
plot(range(1:12),range(1:4), type='n')
points(m_hier, col='red')
lines(l_hier, col='green')
predict(clust.hier_raw,newdata=unlist(m3$glucoseValue))
[1] 2
predict(clust.hier_raw,newdata=unlist(m6$glucoseValue))
[1] 3
predict(clust.hier_raw,newdata=unlist(m9$glucoseValue))
[1] 3
predict(clust.hier_raw,newdata=unlist(m12$glucoseValue))
[1] 4
index_hier_raw=arandi(l_hier,m_hier)
unadjusted_hier_raw=arandi(l_hier,m_hier,adjust=FALSE)
index_hier_raw
[1] 0.3966245
unadjusted_hier_raw
[1] 0.8030303
=========== Partitional => Raw ===========
clust.par_raw <- tsclust(training_frame, type = "partitional", k = 4L, distance = "dtw2", trace=TRUE)
Precomputing distance matrix...
Iteration 1: Changes / Distsum = 12 / 659.1767
Iteration 2: Changes / Distsum = 1 / 489.335
Iteration 3: Changes / Distsum = 0 / 489.335
Elapsed time is 62.314 seconds.
plot(clust.par_raw, type="sc")
t(cbind(training_frame[,0], cluster = clust.par_raw@cluster))
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
cluster 3 3 4 2 3 2 3 1 2 1 1 1
l_par <- clust.par_raw@cluster
m_par <- c(4,4,4,1,1,1,2,2,2,3,3,3)
plot(range(1:12),range(1:4), type='n')
points(m_par, col='red')
lines(l_par, col='green')
predict(clust.par_raw,newdata=unlist(m3$glucoseValue))
[1] 4
predict(clust.par_raw,newdata=unlist(m6$glucoseValue))
[1] 1
predict(clust.par_raw,newdata=unlist(m9$glucoseValue))
[1] 2
predict(clust.par_raw,newdata=unlist(m12$glucoseValue))
[1] 1
index_par_raw=arandi(l_par,m_par)
unadjusted_par_raw=arandi(l_par,m_par,adjust=FALSE)
index_par_raw
[1] 0.2109705
unadjusted_par_raw
[1] 0.7424242
========== K Means => Raw ===========
kmeans_cluster_raw = KMeans_rcpp(training_frame,clusters=4)
l_kmeans_raw = kmeans_cluster_raw$cluster
l_kmeans_raw
[1] 3 4 1 2 2 4 3 4 2 4 4 4
m_kmeans_raw <- c(1,1,1,2,2,2,3,3,3,4,4,4)
index_kmeans_raw = arandi(l_kmeans_raw,m_kmeans_raw)
unadjusted_kmeans_raw = arandi(l_kmeans_raw,m_kmeans_raw,adjust = TRUE)
index_kmeans_raw
[1] 0.04528302
unadjusted_kmeans_raw
[1] 0.04528302
========== Linear Scaling ============
linearScaling = function(data){
scaled = c()
for (i in 1:length(data)) {
scaled[i] = (data[i]-min(data))/(max(data)-min(data))
#print(scaled[i])
}
return(scaled)
}
training_frame_scaled<-rbind.data.frame(
linearScaling(m1$glucoseValue),
linearScaling(m2$glucoseValue),
linearScaling(m3$glucoseValue),
linearScaling(m4$glucoseValue),
linearScaling(m5$glucoseValue),
linearScaling(m6$glucoseValue),
linearScaling(m7$glucoseValue),
linearScaling(m8$glucoseValue),
linearScaling(m9$glucoseValue),
linearScaling(m10$glucoseValue),
linearScaling(m11$glucoseValue),
linearScaling(m12$glucoseValue)
)
training_frame_scaled
======== HIERARCHICAL => Scaled ===========
clust.hier_scaled <- tsclust(training_frame_scaled, type = "h", k = 4L, distance = "dtw2", trace=TRUE, control = hierarchical_control(method = "ward.D"))
Calculating distance matrix...
Performing hierarchical clustering...
Extracting centroids...
Elapsed time is 100.308 seconds.
plot(clust.hier_scaled, type="sc")
plot(clust.hier_scaled)
t(cbind(training_frame_scaled[,0], cluster = clust.hier_scaled@cluster))
1 2 3 4 5 6 7 8 9 10 11 12
cluster 1 2 3 4 2 4 2 4 3 4 4 4
l_hier <- clust.hier_scaled@cluster
m_hier <- c(1,1,1,3,3,3,2,2,2,4,4,4)
plot(range(1:12),range(1:4), type='n')
points(m_hier, col='red')
lines(l_hier, col='green')
predict(clust.hier_scaled,newdata=unlist(linearScaling(m3$glucoseValue)))
[1] 3
predict(clust.hier_scaled,newdata=unlist(linearScaling(m6$glucoseValue)))
[1] 3
predict(clust.hier_scaled,newdata=unlist(linearScaling(m9$glucoseValue)))
[1] 3
predict(clust.hier_scaled,newdata=unlist(linearScaling(m12$glucoseValue)))
[1] 3
index_hier_scaled=arandi(l_hier,m_hier)
unadjusted_hier_scaled=arandi(l_hier,m_hier,adjust=FALSE)
index_hier_scaled
[1] 0.04528302
unadjusted_hier_scaled
[1] 0.6515152
=========== Partitional => Scaled ===========
clust.par_scaled <- tsclust(training_frame_scaled, type = "partitional", k = 4L, distance = "dtw2", trace=TRUE)
Precomputing distance matrix...
Iteration 1: Changes / Distsum = 12 / 37.12822
Iteration 2: Changes / Distsum = 1 / 32.85783
Iteration 3: Changes / Distsum = 0 / 32.85783
Elapsed time is 54.819 seconds.
plot(clust.par_scaled, type="sc")
t(cbind(training_frame_scaled[,0], cluster = clust.par_scaled@cluster))
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
cluster 3 2 4 1 2 1 2 1 2 1 4 4
l_par <- clust.par_scaled@cluster
m_par <- c(3,3,3,1,1,1,2,2,2,4,4,4)
plot(range(1:12),range(1:4), type='n')
points(m_par, col='red')
lines(l_par, col='green')
predict(clust.par_raw,newdata=unlist(linearScaling(m3$glucoseValue)))
[1] 1
predict(clust.par_raw,newdata=unlist(linearScaling(m6$glucoseValue)))
[1] 1
predict(clust.par_raw,newdata=unlist(linearScaling(m9$glucoseValue)))
[1] 1
predict(clust.par_raw,newdata=unlist(linearScaling(m12$glucoseValue)))
[1] 1
index_par_scaled=arandi(l_par,m_par)
unadjusted_par_scaled=arandi(l_par,m_par,adjust=FALSE)
index_par_scaled
[1] 0.02531646
unadjusted_par_scaled
[1] 0.6818182
========== K Means => Scaled ===========
kmeans_cluster_scaled = KMeans_rcpp(training_frame_scaled,clusters=4)
l_kmeans_scaled = kmeans_cluster_scaled$cluster
l_kmeans_scaled
[1] 3 2 3 1 2 1 3 4 4 2 1 4
m_kmeans_scaled <- c(3,3,3,2,2,2,4,4,4,1,1,1)
index_kmeans_scaled = arandi(l_kmeans_scaled,m_kmeans_scaled)
unadjusted_kmeans_scaled = arandi(l_kmeans_scaled,m_kmeans_scaled,adjust = TRUE)
index_kmeans_scaled
[1] 0.08333333
unadjusted_kmeans_scaled
[1] 0.08333333
=========== Z Score Normalization =============
training_frame_zscore<-rbind.data.frame(
zscore(m1$glucoseValue),
zscore(m2$glucoseValue),
zscore(m3$glucoseValue),
zscore(m4$glucoseValue),
zscore(m5$glucoseValue),
zscore(m6$glucoseValue),
zscore(m7$glucoseValue),
zscore(m8$glucoseValue),
zscore(m9$glucoseValue),
zscore(m10$glucoseValue),
zscore(m11$glucoseValue),
zscore(m12$glucoseValue)
)
training_frame_zscore
======== HIERARCHICAL => Z score ===========
clust.hier_zscore <- tsclust(training_frame_zscore, type = "h", k = 4L, distance = "dtw2", trace=TRUE, control = hierarchical_control(method = "ward.D"))
Calculating distance matrix...
Performing hierarchical clustering...
Extracting centroids...
Elapsed time is 139.071 seconds.
plot(clust.hier_zscore, type="sc")
plot(clust.hier_zscore)
t(cbind(training_frame_zscore[,0], cluster = clust.hier_zscore@cluster))
1 2 3 4 5 6 7 8 9 10 11 12
cluster 1 1 2 3 1 3 1 3 4 4 4 2
l_hier <- clust.hier_zscore@cluster
m_hier <- c(1,1,1,3,3,3,2,2,2,4,4,4)
plot(range(1:12),range(1:4), type='n')
points(m_hier, col='red')
lines(l_hier, col='green')
predict(clust.hier_zscore,newdata=unlist(zscore(m3$glucoseValue)))
[1] 2
predict(clust.hier_zscore,newdata=unlist(zscore(m6$glucoseValue)))
[1] 2
predict(clust.hier_zscore,newdata=unlist(zscore(m9$glucoseValue)))
[1] 4
predict(clust.hier_zscore,newdata=unlist(zscore(m12$glucoseValue)))
[1] 2
index_hier_zscore=arandi(l_hier,m_hier)
unadjusted_hier_zscore=arandi(l_hier,m_hier,adjust=FALSE)
index_hier_zscore
[1] 0.06278027
unadjusted_hier_zscore
[1] 0.7121212
=========== Partitional => Zscore ===========
clust.par_zscore <- tsclust(training_frame_zscore, type = "partitional", k = 4L, distance = "dtw2", trace=TRUE)
Precomputing distance matrix...
Iteration 1: Changes / Distsum = 12 / 199.0184
Iteration 2: Changes / Distsum = 2 / 192.3014
Iteration 3: Changes / Distsum = 1 / 161.7827
Iteration 4: Changes / Distsum = 0 / 161.7827
Elapsed time is 69.23 seconds.
plot(clust.par_zscore, type="sc")
t(cbind(training_frame_zscore[,0], cluster = clust.par_zscore@cluster))
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
cluster 3 2 1 1 2 1 2 1 2 4 4 1
l_par <- clust.par_zscore@cluster
m_par <- c(3,3,3,1,1,1,2,2,2,4,4,4)
plot(range(1:12),range(1:4), type='n')
points(m_par, col='red')
lines(l_par, col='green')
predict(clust.par_zscore,newdata=unlist(zscore(m3$glucoseValue)))
[1] 1
predict(clust.par_zscore,newdata=unlist(zscore(m6$glucoseValue)))
[1] 2
predict(clust.par_zscore,newdata=unlist(zscore(m9$glucoseValue)))
[1] 2
predict(clust.par_zscore,newdata=unlist(zscore(m12$glucoseValue)))
[1] 1
index_par_zscore=arandi(l_par,m_par)
unadjusted_par_zscore=arandi(l_par,m_par,adjust=FALSE)
index_par_zscore
[1] -0.007968127
unadjusted_par_zscore
[1] 0.6515152
========== K Means => Zscore ===========
kmeans_cluster_zscore = KMeans_rcpp(training_frame_zscore,clusters=4)
l_kmeans_zscore = kmeans_cluster_zscore$cluster
l_kmeans_zscore
[1] 3 2 2 1 2 1 3 4 2 2 1 4
m_kmeans_zscore <- c(2,2,2,3,3,3,4,4,4,1,1,1)
index_kmeans_zscore = arandi(l_kmeans_scaled,m_kmeans_scaled)
unadjusted_kmeans_zscore = arandi(l_kmeans_scaled,m_kmeans_scaled,adjust = TRUE)
index_kmeans_zscore
[1] 0.08333333
unadjusted_kmeans_zscore
[1] 0.08333333
=========Plotting============
colors = c('red','blue','green')
c_types = c('Hierarchical Clustering','Partitional Clustering','K Means Clustering')
t_types = c('Raw', 'Min-Max Scaling','Z-Score Normalization')
index_all = c(index_hier_raw,index_hier_scaled,index_hier_zscore, index_par_raw, index_par_scaled, index_par_zscore, index_kmeans_raw, index_kmeans_scaled, index_kmeans_zscore)
plot(index_all,xaxt='n',type='h', col = colors, xlab="Clustering Methods", ylab="Adjusted Random Index", main='Adjusted Random Index: Hierarchical, Partitional & K-Means Clustering', lwd=4)
axis(1, at=seq(2,9,by=3), labels=c_types[1:3])
legend("topright",t_types, col = colors, title = 'Transformation Types', lwd=2,cex=.75)
unadjusted_all = c(unadjusted_hier_raw, unadjusted_hier_scaled, unadjusted_hier_zscore, unadjusted_par_raw, unadjusted_par_scaled, unadjusted_par_zscore, unadjusted_kmeans_raw, unadjusted_kmeans_scaled, unadjusted_kmeans_zscore)
plot(unadjusted_all,xaxt='n',type='h', col = colors, xlab="Clustering Methods", ylab="Unadjusted Random Index", main='Unadjusted Index: Hierarchical, Partitional & K-Means Clustering', lwd=4)
axis(1, at=seq(2,9,by=3), labels=c_types[1:3])
legend("topright",t_types, col = colors, title = 'Transformation Types', lwd=2, cex=0.75)